###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file portion produces ideology scores for candidates using coefficients from the ideology inference model produced in prior file and in Figure 1
########################################################
# inputs :: /data/cces_stacked_unmatched.Rdata
# 			 :: 'core_for_ggplot_dem_global.csv'
#				 :: 'core_for_ggplot_rep_global.csv'
# 			 :: 'core_for_ggplot_global.csv'


# outputs ::
# => scoreMats.Rdata which combines the above .csv order files, allows for w/n vs. across party comparisons
# => candidate_matrix_scored.csv, which contains the candidate matrix w/ ideology score estimates for use in later analyses

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R

rm(list=ls()[which(ls()!='dirs')])
library(ggplot2)
library(stringr)

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/cces_stacked_unmatched.Rdata")

o.order <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

dem_core_for_ggplot <- read.csv("data/core_for_ggplot_dem_global.csv")
rep_core_for_ggplot <- read.csv("data/core_for_ggplot_rep_global.csv")
all_core_for_ggplot <- read.csv("data/core_for_ggplot_global.csv")

all_score=reOrder(x=all_core_for_ggplot,o=o.order)[,1:3]
dem_score=reOrder(x=dem_core_for_ggplot,o=o.order)[,1:3]
rep_score=reOrder(x=rep_core_for_ggplot,o=o.order)[,1:3]

names(all_score)[1]='var_names'
names(rep_score)[1]='var_names'
names(dem_score)[1]='var_names'

all_score[,1]=as.character(all_score[,1])
all_score[,2]=as.character(all_score[,2])

dem_score[,1]=as.character(dem_score[,1])
dem_score[,2]=as.character(dem_score[,2])

rep_score[,1]=as.character(rep_score[,1])
rep_score[,2]=as.character(rep_score[,2])

all_score[,1]=gsub(all_score[,1],pattern='i1',replace='i')
dem_score[,1]=gsub(dem_score[,1],pattern='i1',replace='i')
rep_score[,1]=gsub(rep_score[,1],pattern='i1',replace='i')

save(all_score,rep_score,dem_score,file='data/scoreMats.Rdata')

#path='~/Dropbox/IdeologyPrimaryConjoints/merged/AnalysisMatched/'
#load(paste(path,'scoreMats.Rdata',sep=''))
load('data/scoreMats.Rdata')

candidate_matrix=read.csv("data/candidate_matrix.csv",header=T,stringsAsFactors=F)[,-c(1)]

# full item scores
iy=which(
	!grepl(names(candidate_matrix),pattern='dv_') &
	!grepl(names(candidate_matrix),pattern='i1') &
	!grepl(names(candidate_matrix),pattern='i2') &
	grepl(names(candidate_matrix),pattern='_')
)

vec=candidate_matrix[,iy]
vec=vec[,order(names(vec))]

score_mat=all_score[order(all_score[,1]),]

mean(score_mat[1:65,1]==names(vec)[1:65])

vec_mat=matrix(0,nrow(vec),ncol(vec))
for(j in 1:ncol(vec_mat)){
	vec[which(is.na(vec[,j])),j]=0
	vec_mat[,j]=vec[,j]*score_mat[j,3]
}

library(matrixStats)
scores=rowMeans(vec_mat,na.rm=T)
scores[which(scores==0)]=NA # eliminate complete nulls

# alvarez-like measure of uncertainty/inconsistency
scores_var=rowVars(vec_mat,na.rm=T)

# center and standardize
scores=(scores-mean(scores,na.rm=T))/sd((scores-mean(scores,na.rm=T)),na.rm=T)

candidate_matrix$pty <- ifelse(candidate_matrix$conjoints==1 | candidate_matrix$conjoints==3 | candidate_matrix$conjoints==5 | candidate_matrix$conjoints==7,0,1)
candidate_matrix$scores=scores

cq=as.numeric(gsub(cces_stacked[,grep(names(cces_stacked),pattern='cq')],pattern='Text ',replace=''))
#cq[which(cq==4)]=4
#1. toss up
#2. safe R
#3. safe D
#4. generic

candidate_matrix$cq=c(cq,cq)
#write.csv(candidate_matrix,file='data/candidate_matrix_scored.csv')

candidate_matrix$scores_var=scores_var

##############################
# policy-specific scores
##############################

ix=which(
	#!grepl(names(candidate_matrix),pattern='dv_') &
	#!grepl(names(candidate_matrix),pattern='i1') &
	#!grepl(names(candidate_matrix),pattern='i2') &
	grepl(names(candidate_matrix),pattern='i_') |
  grepl(names(candidate_matrix),pattern='e_')
)


vec=candidate_matrix[,ix]
vec=vec[,order(names(vec))]

ii=c(grep(all_score[,1],pattern='i_'),grep(all_score[,1],pattern='e_'))
score_mat=all_score[ii,][order(all_score[ii,1]),]

mean(score_mat[1:34,1]==names(vec)[1:34])

vec_mat=matrix(0,nrow(vec),ncol(vec))
for(j in 1:ncol(vec_mat)){
	vec[which(is.na(vec[,j])),j]=0
	vec_mat[,j]=vec[,j]*score_mat[j,3]
}

library(matrixStats)
scores=rowMeans(vec_mat,na.rm=T)
scores[which(scores==0)]=NA # eliminate complete nulls

# alvarez-like measure of uncertainty/inconsistency
scores_var=rowVars(vec_mat,na.rm=T)

# center and standardize
scores=(scores-mean(scores,na.rm=T))/sd((scores-mean(scores,na.rm=T)),na.rm=T)

#candidate_matrix$pty <- ifelse(candidate_matrix$conjoints==1 | candidate_matrix$conjoints==3 | candidate_matrix$conjoints==5 | candidate_matrix$conjoints==7,0,1)
candidate_matrix$scores_policy=scores
candidate_matrix$scores_policy_var=scores_var

#write.csv(candidate_matrix,file='data/candidate_matrix_scored.csv')
# overwriting older file
write.csv(candidate_matrix,file='data/candidate_matrix.csv')

#END 2_build_libcon_score_ii

# trying to keep everything together in the data
# -- cces_stacked in data/cces_stacked_unmatched.Rdata
# -- voter_matrix in data/voter_matrix.csv
# -- candidate_matrix in data/candidate_matrix.csv (revised)
# -- core_for_ggplot in data/core_for_ggplot_rep_global.csv
# -- core_for_ggplot in data/core_for_ggplot_dem_global.csv (for dems)
# -- core_for_ggplot in data/core_for_ggplot_rep_global.csv (for reps)
# -- all_score,rep_score,dem_score, in data/scoreMats.Rdata
